clear
*1850
do "usa_00097.do"
clear
*1860
do "usa_00114.do"


***************************
*Merge 1850 Census to county crosswalk, and normal school/asylum counties

use "1850_full.dta", clear

*Exclude people living in group quarters
tab gq
keep if gq~=3 & gq~=4

gen male1850 = sex==1
replace male1850 = . if sex==.

gen white1850 = race==1
replace white1850 = . if race==.

egen realprop_parents = rowtotal(realprop_pop realprop_mom), missing
gen nonmissingrealpropparents = realprop_parents~=.

keep histid realprop_parents nonmissingrealpropparents age sex male1850 white1850 race stateicp countyicp occ1950_mom occ1950_pop school occ1950 occscore_pop perwt

*Fraction living with parents nationally
*Want to use the fraction nationally since the children could move anywhere

mean nonmissingrealpropparents if male1850==1 & white1850==1, over(age)
mean nonmissingrealpropparents if male1850==0 & white1850==1, over(age)

* rename the variables to indicate the year they are from
for var histid realprop_parents age sex race stateicp countyicp occ1950_mom occ1950_pop school occ1950 occscore_pop perwt: rename X X1850

*Merge with Eckert crosswalk
rename stateicp1850 icpsrst 
rename countyicp1850 icpsrcty

joinby icpsrst icpsrcty using "EGLP_1850.dta", unmatched(both)

rename _merge merge1850crosswalk

gen cty_fips = nhgisst_1990*100 + (nhgiscty_1990/10)

*Keep counties that VA independent cities were a part of
*https://www.bea.gov/system/files/methodologies/LAPI-Methodology.pdf
gen VAcounties = cty_fips==51177|cty_fips==51165|cty_fips==51121|cty_fips==51053

*For the VA counties replace the county fips of the surrounding county as the county of the independent city
*This will keep those records when merge to Normal Asylum counties

replace cty_fips = 51730 if cty_fips==51053
replace cty_fips = 51750 if cty_fips==51121
replace cty_fips = 51630 if cty_fips==51177
replace cty_fips = 51660 if cty_fips==51165

egen tagctyfipsicpsr = tag(cty_fips icpsrst icpsrcty)
bysort cty_fips tagctyfipsicpsr: egen totweightx = total(weight) if tagctyfipsicpsr==1
bysort cty_fips: egen totweight = max(totweightx)

bysort cty_fips: egen maxweight = max(weight)

*Drop non-merged from crosswalk
drop if merge1850crosswalk==2 & VAcounties~=1

*Some counties in IPUMS don't merge to crosswalk.  That is because the ICPSR codes in IPUMS use present-day state boundaries, but the 
*crosswalk does not. If someone lived in what became WV, the IPUMS ICPSR code uses WV, but the crosswalk code uses the state at
*the time of the census (VA). Anyone who lives in what is present-day WV in IPUMS is dropped because they don't merge.  This is fine since WV
*is not a state in 1850, which was our sample restriction.  There is one county in TX that does not merge, because the county in 1850 was 
*a combined set of counties, and had a different code. The IPUMS code uses the new county boundary (e.g., in 1850 was Cameron-Starr-Webb county
*but now there are three separate counties, Cameron, Starr, and Webb.  IPUMS uses Cameron's code, but that is not in the crosswalk
*because uses the combined code).  None of the counties that got people from Cameron-Starr-Webb in the crosswalk were normal or asylum counties
*so this is not an issue for us.  Similarly for Louisiana.  Georgia also has an example of this but it is not in our dataset since
*they don't have at least one normal school and asylum

drop if merge1850crosswalk==1

merge m:1 cty_fips using "justnormasylum.dta"
rename _merge mergenormasylum

*Keep normal school and asylum counties
keep if mergenormasylum==3

rename cty_fips cty_fips1850

drop year nhgisst nhgiscty statenam nhgisnam area_base statenam_1990 nhgisnam_1990 icpsrst_1990 icpsrcty_1990 area us_state
for var hasnormalschool hasnormalorasylum mergenormasylum: rename X X1850

tostring cty_fips1850, gen(cty_fipstr)
gen statefip1850 = substr(cty_fipstr, 1,1) if length(cty_fipstr)==4
replace statefip1850 = substr(cty_fipstr, 1, 2) if length(cty_fipstr)==5
destring statefip1850, replace

*Keep states that did not have schools by 1860: AL, AR, AZ, CA, CO, FL, GA, IA, ID, IN, KS, 
*KY, LA, MD, ME, MN, MO, MS, MT, NC, ND, NE, NH, NM, OH, OK, OR, RI, SC, SD, TN, TX, UT, VA,
*VT, WA, WI, WV, and drop DE which did not have a normal school

#delimit ;
keep if statefip1850==1|statefip1850==5|statefip1850==4|statefip1850==6|statefip1850==8|
statefip1850==12|statefip1850==13|statefip1850==19|statefip1850==16|statefip1850==18|
statefip1850==20|statefip1850==21|statefip1850==22|statefip1850==24|statefip1850==23|
statefip1850==27|statefip1850==29|statefip1850==28|statefip1850==30|statefip1850==37|
statefip1850==38|statefip1850==31|statefip1850==33|statefip1850==35|statefip1850==39|statefip1850==40|
statefip1850==41|statefip1850==44|statefip1850==45|statefip1850==46|statefip1850==47|
statefip1850==48|statefip1850==49|statefip1850==51|statefip1850==50|
statefip1850==53|statefip1850==55|statefip1850==54;

#delimit cr
*Drop states that were not yet states on Census Day 1850: 

#delimit ;
drop if statefip1850==6|statefip1850==27|statefip1850==38|statefip1850==46|statefip1850==49|statefip==4|
statefip==8|statefip==16|statefip==30|statefip==31|statefip==32|statefip==35|statefip==40|
statefip==41|statefip==53|statefip==56;

#delimit cr

*Drop DE, Georgia, RI, SC because no asylum counties

drop if statefip==13|statefip==44|statefip==45

tab statefip

save "1850_small.dta", replace

******************************
*1860 Census

clear
use "1860_full.dta", clear


egen realprop_parents = rowtotal(realprop_pop realprop_mom), missing
gen nonmissingrealpropparents = realprop_parents~=.

keep gq histid race realprop_parents nonmissingrealpropparents age sex stateicp countyicp school occ1950 occscore realprop persprop school_sp occ1950_sp realprop_sp persprop_sp

* rename the variables to indicate the year they are from
for var gq histid race realprop_parents nonmissingrealpropparents age sex stateicp countyicp school occ1950 occscore realprop persprop school_sp occ1950_sp realprop_sp persprop_sp: rename X X1860

******************************************************
*Get the distribution of wealth in 1860 in the states in the sample
preserve
keep if gq~=3 & gq~=4

*Use 25-28 because some of the 16 year-olds in 1850 may be 25 in 1860
keep if age>=25 & age<=28
keep if race==1


*Now merge with Eckert 1860 crosswalk
rename stateicp1860 icpsrst 
rename countyicp1860 icpsrcty
	
joinby icpsrst icpsrcty using "EGLP_1860.dta", unmatched(both)
rename _merge merge1860crosswalk

drop cty_fips
gen cty_fips = nhgisst_1990*100 + (nhgiscty_1990/10)
rename cty_fips cty_fips1860
tostring cty_fips1860, gen(cty_fipstr)
gen statefip1860 = substr(cty_fipstr, 1,1) if length(cty_fipstr)==4
replace statefip1860 = substr(cty_fipstr, 1, 2) if length(cty_fipstr)==5
destring statefip1860, replace

*Replace LA state for the one county from IPUMS that does not merge so that we keep it
replace statefip1860 = 22 if statefip1860==. & icpsrst==45 & merge1860crosswalk==1

drop if merge1860crosswalk==1 & icpsrst~=45

*Keep states that did not have schools by 1860: AL, AR, AZ, CA, CO, FL, GA, IA, ID, IN, KS, 
*KY, LA, MD, ME, MN, MO, MS, MT, NC, ND, NE, NH, NM, OH, OK, OR, RI, SC, SD, TN, TX, UT, VA,
*VT, WA, WI, WV, and drop DE which did not have a normal school

#delimit ;
keep if statefip1860==1|statefip1860==5|statefip1860==4|statefip1860==6|statefip1860==8|
statefip1860==12|statefip1860==13|statefip1860==19|statefip1860==16|statefip1860==18|
statefip1860==20|statefip1860==21|statefip1860==22|statefip1860==24|statefip1860==23|
statefip1860==27|statefip1860==29|statefip1860==28|statefip1860==30|statefip1860==37|
statefip1860==38|statefip1860==31|statefip1860==33|statefip1860==35|statefip1860==39|statefip1860==40|
statefip1860==41|statefip1860==44|statefip1860==45|statefip1860==46|statefip1860==47|
statefip1860==48|statefip1860==49|statefip1860==51|statefip1860==50|
statefip1860==53|statefip1860==55|statefip1860==54;

#delimit cr
*Drop states that were not yet states on Census Day 1850: 

#delimit ;
drop if statefip1860==6|statefip1860==27|statefip1860==38|statefip1860==46|statefip1860==49|statefip1860==4|
statefip1860==8|statefip1860==16|statefip1860==30|statefip1860==31|statefip1860==32|statefip1860==35|statefip1860==40|
statefip1860==41|statefip1860==53|statefip1860==56;

#delimit cr

*Drop Georgia, RI, SC because no asylum counties

drop if statefip1860==13|statefip1860==44|statefip1860==45

*Drop states with small samples that drop for columns 3-5 of Table 1

drop if statefip1860==12|statefip1860==19|statefip1860==48|statefip1860==55

egen taghistid = tag(histid1860)
egen hhrealprop = rowtotal(realprop1860 realprop_sp1860), missing
egen hhpersprop = rowtotal(persprop1860 persprop_sp1860), missing

tab hhrealprop if taghistid==1
tab hhpersprop if taghistid==1

restore

save "1860_small.dta", replace

*Merging the IPUMS censuses to the Census Tree crosswalks

clear
import delimited using "1850_1860.csv", varnames(1)

save "1850_1860.dta", replace

merge 1:m histid1850 using "1850_small.dta"
rename _merge mergetree1850

gen merged1850 = mergetree1850==3
drop if mergetree1850==1

gen school1850U = school1850==2

gen black1850 = race1850==2
replace black1850 = . if race1850==.

gen dad_unsk = occ1950_pop1850>=700 & occ1950_pop1850<=970
replace dad_unsk = . if (occ1950_pop1850==.|occ1950_pop1850==999)

gen posoccdad = (occ1950_pop1850~=. & occ1950_pop1850~=999)

gen cty_fips = cty_fips1850

*Keep only the records that merge to the Census Tree and keep only the people living with parents
#delimit cr
keep if mergetree1850==3
keep if nonmissingrealpropparents==1

*Merge to 1860 record
merge m:1 histid1860 using "1860_small.dta"
rename _merge mergetree1860
drop if mergetree1860==2

egen taghistid1850 = tag(histid1850)

drop if mergetree1860==1

local vars school1860
foreach x of local vars{
gen `x'U = `x'==2
}

gen constant = 1

gen unsk1860 = occ19501860>=700 & occ19501860<=970
replace unsk1860 = . if (occ19501860==.|occ19501860==999)

gen unsk_sp1860 = occ1950_sp1860>=700 & occ1950_sp1860<=970
replace unsk_sp1860 = . if (occ1950_sp1860==.|occ1950_sp1860==999)

gen farmer1860hh = occ19501860==100|occ19501860==123|occ1950_sp1860==100|occ1950_sp1860==123
replace farmer1860hh = . if ((occ19501860==.) & (occ1950_sp1860==.))

gen farmer1860 = occ19501860==100|occ19501860==123

gen whitecollar1860 = (occ19501860>=0 & occ19501860<=99)|(occ19501860>=200 & occ19501860<=490)
gen whitecollar_sp1860 = (occ1950_sp1860>=0 & occ1950_sp1860<=99)|(occ1950_sp1860>=200 & occ1950_sp1860<=490)

gen craftoper1860 = occ19501860>=500 & occ19501860<=690
gen craftoper_sp1860 = occ1950_sp1860>=500 & occ1950_sp1860<=690

gen nonocc1860 = occ19501860>=980 & occ19501860<=999

gen nonocc_sp1860 = occ1950_sp1860>=980 & occ1950_sp1860<=999

gen occschool1860 = occ19501860==983

local vars unsk craftoper whitecollar
foreach x of local vars{
egen `x'hh = rowmax(`x'1860 `x'_sp1860)
replace `x'hh = . if occ19501860==. & occ1950_sp1860==.
}

gen nonocchh = nonocc1860==1 & nonocc_sp1860==1
replace nonocchh = 1 if nonocc1860==1 & occ1950_sp1860==.
replace nonocchh = . if occ19501860==. & occ1950_sp1860==.

gen dadunsk1850 = occ1950_pop1850>=700 & occ1950_pop1850<=970
gen momunsk1850 = occ1950_mom1850>=700 & occ1950_mom1850<=970

gen dadwhitecollar1850 = (occ1950_pop1850>=0 & occ1950_pop1850<=99)|(occ1950_pop1850>=200 & occ1950_pop1850<=490)
gen momwhitecollar1850 = (occ1950_mom1850>=0 & occ1950_mom1850<=99)|(occ1950_mom1850>=200 & occ1950_mom1850<=490)

gen dadcraftoper1850 = occ1950_pop1850>=500 & occ1950_pop1850<=690
gen momcraftoper1850 = occ1950_mom1850>=500 & occ1950_mom1850<=690

gen dadnonocc1850 = occ1950_pop1850>=980 & occ1950_pop1850<=999
gen momnonocc1850 = occ1950_mom1850>=980 & occ1950_mom1850<=999

gen parent_farmer = occ1950_pop1850==100|occ1950_pop1850==123|occ1950_mom1850==100|occ1950_mom1850==123
replace parent_farmer = . if ((occ1950_pop1850==.) & (occ1950_mom1850==.))

gen dad_farmer = occ1950_pop1850==100|occ1950_pop1850==123

local vars unsk craftoper whitecollar 
foreach x of local vars{
egen parent`x' = rowmax(dad`x'1850 mom`x'1850)
replace parent`x' = . if occ1950_pop1850==. & occ1950_mom1850==.
}

gen occdiffhh = unskhh==1 & parentunsk~=1
replace occdiffhh = 1 if craftoperhh==1 & parentcraftoper~=1
replace occdiffhh = 1 if whitecollarhh==1 & parentwhitecollar~=1
replace occdiffhh = 1 if farmer1860hh==1 & parent_farmer~=1

gen occdiffdad = unsk1860==1 & dadunsk~=1
replace occdiffdad = 1 if craftoper1860==1 & dadcraftoper~=1
replace occdiffdad = 1 if whitecollar1860==1 & dadwhitecollar~=1
replace occdiffdad = 1 if farmer1860==1 & dad_farmer~=1

gen farmerhhdiffpar = farmer1860hh==1 & parent_farmer~=1
gen whitecollarhhdiffpar = whitecollarhh==1 & parentwhitecollar~=1
gen craftoperhhdiffpar = craftoperhh==1 & parentcraftoper~=1
gen unskhhdiffpar = unskhh==1 & parentunsk~=1

gen farmerdiffdad = farmer1860==1 & dad_farmer~=1
gen whitecollardiffdad = whitecollar1860==1 & dadwhitecollar~=1
gen craftoperdiffdad = craftoper1860==1 & dadcraftoper~=1
gen unskdiffdad = unsk1860==1 & dadunsk~=1

gen notfarmerdiffdad = farmer1860~=1 & dad_farmer==1 & nonocc1860~=1
gen notwhitecollardiffdad = whitecollar1860~=1 & dadwhitecollar==1 & nonocc1860~=1
gen notcraftoperdiffdad = craftoper1860~=1 & dadcraftoper==1 & nonocc1860~=1
gen notunskdiffdad = unsk1860~=1 & dadunsk==1 & nonocc1860~=1
gen notnondiffdad = nonocc1860~=1 & dadnonocc==1

gen farmeranddad = farmer1860==1 & dad_farmer==1

gen parentnonocc = dadnonocc1850==1 & momnonocc1850==1
replace parentnonocc = 1 if dadnonocc1850==1 & occ1950_mom1850==.
replace parentnonocc = 1 if momnonocc1850==1 & occ1950_pop1850==.

gen married1860 = school_sp1860~=. 
 
egen hhrealprop = rowtotal(realprop1860 realprop_sp1860), missing
egen hhpersprop = rowtotal(persprop1860 persprop_sp1860), missing

gen pargroup2 = 1  if realprop_parents1850<=150
replace pargroup2 = 2 if realprop_parents1850>150

*Generate top quartile indicators for real estate and personal estate
gen hhrealpropgteq400 = hhrealprop>=400
gen hhperspropgteq372 = hhpersprop>=372

*Parent wealth distribution in 1850
preserve 
do "parent_wealth1850.do"
restore

gen parzero150 = realprop_parents1850<=150
gen par1501000 = realprop_parents1850>150 & realprop_parents1850<=1000
gen pargt1000 = realprop_parents1850>1000

save "1850_1860_merged.dta", replace

set seed 834138

*Table 1 columns 3-5, Table A9: Mobility by Parental Real Estate

forvalues i = 1(1)2{
preserve


keep if pargroup2==`i'
drop if realprop_parents1850==.

keep if age1850>=16 & age1850<=18

tab age1850
tab age1860
count
#delimit ;
local vars occschool1860 hhrealpropgteq400 hhperspropgteq372 male1850 school1860U constant married1860;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum)  occschool1860 hhrealpropgteq400 hhperspropgteq372 school1860U male1850 constant married1860 (mean) statefip1850 hasnormalschool totweight maxweight, by(cty_fips1850 white1850);

#delimit cr

su constant if white1850==1, d

gen constantrd = round(constant,1)
gen below10 = constantrd<10

tab cty_fips1850 hasnormalschool if constant<=10 & white1850==0
tab cty_fips1850 hasnormalschool if constant>10 & constant~=. & white1850==0
tab statefip hasnormalschool if white1850==0 

drop if white1850==0

bysort statefip hasnormalschool: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip: egen maxfracbelow10 = max(fracbelow10)

tab statefip if maxfracbelow10>=.5

tab maxweight
tab maxweight if (statefip==12|statefip==19|statefip==48|statefip==55)
tab maxweight if (statefip~=12 & statefip~=19 & statefip~=48 & statefip~=55)

tab totweight
tab totweight if (statefip==12|statefip==19|statefip==48|statefip==55)
tab totweight if (statefip~=12 & statefip~=19 & statefip~=48 & statefip~=55)

tab hasnormalschool totweight if statefip==12
tab hasnormalschool totweight if statefip==19
tab hasnormalschool totweight if statefip==48
tab hasnormalschool totweight if statefip==55

*Drop states for which 50% or more of their normal or asylum counties have sample size less than 10

drop if maxfracbelow10>=.5

local vars occschool1860 hhrealpropgteq400 hhperspropgteq372  male1850 married1860 school1860U
foreach x of local vars{
gen `x'mean = `x'/constant
}

sort cty_fips


local vars occschool1860 hhrealpropgteq400 hhperspropgteq372 male1850 school1860U married1860   
foreach x of local vars{

reghdfe `x'mean hasnormalschool if white1850==1, absorb(statefip1850) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1850 if e(sample)==1
outreg2 using censustree18501860.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1850) reps(1000): reg `x'mean hasnormalschool i.statefip1850 if white1850==1;

#delimit cr
}

pca occschool1860mean hhrealpropgteq400mean hhperspropgteq372mean school1860Umean married1860mean if white1850==1
predict pc1 pc2 pc3 pc4 pc5, score

reghdfe pc1 hasnormalschool1850 if white1850==1, absorb(statefip1850) vce(robust)
su pc1 if e(sample)==1 & hasnormalschool1850==0
local mean = r(mean)
outreg2 using censustree18501860.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

su pc1 if white1850==1

#delimit ;
permute hasnormalschool1850 _b[hasnormalschool1850], strata(statefip1850) reps(1000): reg pc1 hasnormalschool1850 i.statefip1850 if white1850==1;

#delimit cr
restore
}

********************************
*Table 1 column 6, Tables A5, A8:  Occupations, and parental real estate distribution
********************************

preserve

drop if realprop_parents1850==.

keep if age1850>=16 & age1850<=18

tab age1850
tab age1860
count
#delimit ;
local vars occdiffhh parzero150 par1501000 pargt1000 unskhh craftoperhh whitecollarhh nonocchh farmer1860hh male1850 parentunsk parentcraftoper parentwhitecollar parentnonocc parent_farmer constant;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) occdiffhh parzero150 par1501000 pargt1000 unskhh craftoperhh whitecollarhh nonocchh farmer1860hh parentunsk parentcraftoper parentwhitecollar parentnonocc parent_farmer male1850 constant (mean)statefip1850 hasnormalschool, by(cty_fips1850 white1850);

#delimit cr

su constant if white1850==1, d

tab cty_fips1850 hasnormalschool if constant<=10 & white1850==1
tab statefip hasnormalschool if white1850==1 

tab cty_fips1850 hasnormalschool if constant<=10 & white1850==0
tab statefip hasnormalschool if white1850==0 

drop if white1850==0
gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip: egen maxfracbelow10 = max(fracbelow10)

tab statefip if maxfracbelow10>=.5
drop if maxfracbelow10>=.5

drop if statefip==12|statefip==19|statefip==48|statefip==55

local vars  parzero150 par1501000 pargt1000 parent_farmer parentunsk parentcraftoper parentwhitecollar parentnonocc unskhh craftoperhh whitecollarhh nonocchh farmer1860hh male1850 
foreach x of local vars{
gen `x'mean = `x'/constant
}

local vars occdiffhh 
foreach x of local vars{
gen `x'mean = `x'/(constant-nonocchh)
}

sort cty_fips

local vars occdiffhh parzero150 par1501000 pargt1000 unskhh craftoperhh whitecollarhh nonocchh  farmer1860hh male1850 parentunsk parentcraftoper parentwhitecollar  parentnonocc parent_farmer  
foreach x of local vars{

reghdfe `x'mean hasnormalschool if white1850==1, absorb(statefip1850) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1850 if e(sample)==1
outreg2 using censustree18501860.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1850) reps(1000): reg `x'mean hasnormalschool i.statefip1850 if white1850==1;

#delimit cr
}
restore


*****
*Tables A6, A7: Male Occupations

preserve

drop if realprop_parents1850==.

keep if age1850>=16 & age1850<=18
keep if male1850==1
*Drop if dad's location not found
keep if occ1950_pop1850~=.

tab age1850
tab age1860
count
#delimit ;
local vars constant notfarmerdiffdad notwhitecollardiffdad notcraftoperdiffdad notunskdiffdad notnondiffdad farmerdiffdad whitecollardiffdad craftoperdiffdad unskdiffdad occdiffdad  unsk1860 craftoper1860 whitecollar1860 farmer1860 nonocc1860 dadunsk1850 dadcraftoper1850 dadwhitecollar1850 dad_farmer dadnonocc1850  ;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) notfarmerdiffdad notwhitecollardiffdad notcraftoperdiffdad notunskdiffdad notnondiffdad occdiffdad farmerdiffdad whitecollardiffdad craftoperdiffdad unskdiffdad  unsk1860 craftoper1860 whitecollar1860 farmer1860 nonocc1860 dadunsk1850 dadcraftoper1850 dadwhitecollar1850 dad_farmer dadnonocc1850 constant  (mean)statefip1850 hasnormalschool, by(cty_fips1850 white1850);

#delimit cr

su constant if white1850==1, d
su constant if white1850==1 & statefip1850==12
tab cty_fips1850 hasnormalschool if constant<=10 & white1850==1
tab statefip hasnormalschool if white1850==1 

drop if white1850==0
gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip: egen maxfracbelow10 = max(fracbelow10)

tab statefip if maxfracbelow10>=.5
drop if maxfracbelow10>=.5

*Drop states for which 50% or more of their normal or asylum counties have sample size less than 10 for columns 3-5 of Table 1

drop if statefip==12|statefip==19|statefip==48|statefip==55

local vars   unsk1860 craftoper1860 whitecollar1860 farmer1860 nonocc1860 dadunsk1850 dadcraftoper1850 dadwhitecollar1850 dad_farmer dadnonocc1850
foreach x of local vars{
gen `x'mean = `x'/constant
}

local vars notfarmerdiffdad notwhitecollardiffdad notcraftoperdiffdad notunskdiffdad notnondiffdad occdiffdad farmerdiffdad whitecollardiffdad craftoperdiffdad unskdiffdad 
foreach x of local vars{
gen `x'mean = `x'/(constant-nonocc1860)
}

sort cty_fips
local vars notfarmerdiffdad notwhitecollardiffdad notcraftoperdiffdad notunskdiffdad notnondiffdad farmerdiffdad whitecollardiffdad craftoperdiffdad unskdiffdad  occdiffdad   unsk1860 craftoper1860 whitecollar1860 farmer1860 nonocc1860 dadunsk1850 dadcraftoper1850 dadwhitecollar1850 dad_farmer dadnonocc1850  
foreach x of local vars{

reghdfe `x'mean hasnormalschool if white1850==1, absorb(statefip1850) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1850 if e(sample)==1

outreg2 using censustree18501860.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

#delimit ;
di "`x'";
permute hasnormalschool _b[hasnormalschool], strata(statefip1850) reps(1000): reg `x'mean hasnormalschool i.statefip1850 if white1850==1;
#delimit cr
}

restore

*****
*Appendix B1 Text Statistic: Differential likelihood of living with parents in 1860 among individuals 6-8 in 1850

*For lower SES children
preserve

keep if age1850>=6 & age1850<=8
keep if realprop_parents1850<=150

#delimit ;
local vars nonmissingrealpropparents1860 constant;

foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) nonmissingrealpropparents1860 constant (mean)statefip1850 hasnormalschool, by(cty_fips1850 white1850 male1850);

#delimit cr


drop if statefip==12|statefip==19|statefip==48|statefip==55

local vars nonmissingrealpropparents1860 
foreach x of local vars{
gen `x'm = `x'/constant
}

local vars nonmissingrealpropparents1860
foreach x of local vars{

reghdfe `x'm hasnormalschool if white1850==1 & male1850==1, absorb(statefip1850) vce(robust)
su `x'm if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1850 if e(sample)==1

reghdfe `x'm hasnormalschool if white1850==1 & male1850==0, absorb(statefip1850) vce(robust)
su `x'm if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1850 if e(sample)==1
}

restore

*For all children

preserve

keep if age1850>=6 & age1850<=8
keep if realprop_parents1850~=.

#delimit ;
local vars nonmissingrealpropparents1860 constant;

foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) nonmissingrealpropparents1860 constant (mean)statefip1850 hasnormalschool, by(cty_fips1850 white1850 male1850);

#delimit cr

*Drop states for which 50% or more of their normal or asylum counties have sample size less than 10 for columns 3-5 of Table 1


drop if statefip==12|statefip==19|statefip==48|statefip==55

local vars nonmissingrealpropparents1860 
foreach x of local vars{
gen `x'm = `x'/constant
}

local vars nonmissingrealpropparents1860
foreach x of local vars{

reghdfe `x'm hasnormalschool if white1850==1 & male1850==1, absorb(statefip1850) vce(robust)
su `x'm if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1850 if e(sample)==1

reghdfe `x'm hasnormalschool if white1850==1 & male1850==0, absorb(statefip1850) vce(robust)
su `x'm if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1850 if e(sample)==1
}

restore
*******************************
*Appendix B1 Text Statistics: Description of Census Tree merge
********************************

clear
use "1850_1860.dta"

merge 1:m histid1850 using "1850_small.dta"
rename _merge mergetree1850

gen merged1850 = mergetree1850==3
drop if mergetree1850==1

gen school1850U = school1850==2

gen cty_fips = cty_fips1850

preserve
keep if realprop_parents1850~=.
drop if statefip==12|statefip==19|statefip==48|statefip==55
keep if age1850>=16 & age1850<=18

#delimit cr
gen constant = 1

egen taghistid1850 = tag(histid1850)

*Number with links in the Census Tree by sex 
tab mergetree1850 sex1850 if white1850==1 & taghistid1850==1 & nonmissingrealpropparents==1, missing

#delimit ;
local vars constant merged1850;

foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};
#delimit ;
collapse (sum) school1850U constant merged1850 (mean) hasnormalschool statefip1850, by(cty_fips white1850);

#delimit cr

local vars merged1850
foreach x of local vars{
gen `x'mean = `x'/constant
}

rename merged1850mean fracmerged1850

*Differential likelihood of merging to Census Tree between normal school and asylum counties
reghdfe fracmerged1850 hasnormalschool if white1850==1, absorb(statefip) vce(robust)
su fracmerged1850 if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
outreg2 using censustree18501860.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)


restore

preserve

*Footnote 51: Number of individuals that do not merge to the 1860 census
keep if realprop_parents1850~=.
drop if statefip==12|statefip==19|statefip==48|statefip==55
keep if age1850>=16 & age1850<=18

*Keep only the records that merge to the Census Tree and keep only the people living with parents
#delimit cr
keep if mergetree1850==3

*Merge to 1860 record
merge m:1 histid1860 using "1860_small.dta"
rename _merge mergetree1860
drop if mergetree1860==2

egen taghistid1850 = tag(histid1850)
tab mergetree1860 sex1850 if white1850==1 & taghistid1850==1, missing

restore

preserve
keep if realprop_parents1850<=150
drop if statefip==12|statefip==19|statefip==48|statefip==55
keep if age1850>=16 & age1850<=18

#delimit cr
gen constant = 1

egen taghistid1850 = tag(histid1850)

*Number with links in the Census Tree by sex, among low socioeconomic status children

tab mergetree1850 sex1850 if white1850==1 & taghistid1850==1 & nonmissingrealpropparents==1

#delimit ;
local vars constant merged1850 ;

foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};
#delimit ;
collapse (sum)  constant merged1850 (mean) hasnormalschool statefip1850, by(cty_fips white1850);

#delimit cr

local vars merged1850
foreach x of local vars{
gen `x'mean = `x'/constant
}

rename merged1850mean fracmerged1850

*Differential likelihood of merging to Census Tree between normal school and asylum counties, among low socioeconomic status children

reghdfe fracmerged1850 hasnormalschool if white1850==1, absorb(statefip) vce(robust)
su fracmerged1850 if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
outreg2 using censustree18501860.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)


restore

